1. Import

# Libraries
library(ggplot2)
library(cowplot)
library(Seurat)
library(SeuratWrappers)
library(harmony)
library(Matrix)
library(tidyverse)
# Data (sorted human iNKT cells from 3 subjects)
path.plots <- "~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/00_Reproduce_UMAPs"
path.data <- "~/Projects/20220809_Thymic-iNKT-CrossSpecies/data/raw_data/human_data"
human5 <- read.csv(file.path(path.data, "CUThy13_220225_SampleTag05_hs_NKT_RSEC_MolsPerCell.csv"), sep=",", header=T, skip=7) # 404 cells
human8 <- read.csv(file.path(path.data, "CUTHY11BDRscRNA_seq_091621_SampleTag08_hs_NKT_RSEC_MolsPerCell.csv"), sep=",", header=T, skip=8) # 1913 cells
human12 <- read.csv(file.path(path.data, "CUTHY12BDRscRNA_seq_211101_SampleTag12_hs_NKT_RSEC_MolsPerCell.csv"), sep=",", header=T, skip=8) # 344 cells

# Current df format:
# Rownames || Cell_Index | Gene1 | Gene2 | ...
#   -      ||   421953   |   0   |   0   | ...
#   -      ||   459121   |   0   |   3   | ...

# Invert the dataframes (cells as columns and genes as rows) and transform to dgCMatrix
convert_to_matrix <- function(df){
  rownames(df) <- df$Cell_Index
  df$Cell_Index <- NULL
  df <- t(df)
  df <- Matrix(df, sparse=T)
  return(df)
}

mat.hu5  <- convert_to_matrix(human5) # 404 cells
mat.hu8  <- convert_to_matrix(human8) # 1913 cells
mat.hu12 <- convert_to_matrix(human12) # 344 cells

2. Preprocessing

# Create Seurat Object, keep only features expressed in at least 1 cell
seur.h5  <- CreateSeuratObject(mat.hu5, project="Hu_Thymus_NKT_5", min.cells=1)
seur.h8  <- CreateSeuratObject(mat.hu8, project="Hu_Thymus_NKT_8", min.cells=1)
seur.h12 <- CreateSeuratObject(mat.hu12, project="Hu_Thymus_NKT_12", min.cells=1)

# Combine into one seurat object
seur.combined <- merge(seur.h5, y=c(seur.h8,seur.h12), add.cell.ids=c('Hu5', 'Hu8', 'Hu12'), project="HuNKT") # 2661 cells and 18,520 features
print(seur.combined)
## An object of class Seurat 
## 18520 features across 2661 samples within 1 assay 
## Active assay: RNA (18520 features, 0 variable features)
# QC
seur.combined[["percent.mt"]] <- PercentageFeatureSet(seur.combined, pattern = "^MT\\.")
head(seur.combined@meta.data, 5)
FeatureScatter(seur.combined, feature1 = "nCount_RNA", feature2 = "percent.mt")

FeatureScatter(seur.combined, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

VlnPlot(seur.combined, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size=.01)

# ggsave(file.path(path.plots, "hu_qc.jpeg"), width=10, height=6)
seur.combined <- subset(seur.combined, subset = nFeature_RNA >= 500 & nFeature_RNA < 4000 & nCount_RNA >=500 & percent.mt < 25)
table(seur.combined@meta.data$orig.ident) # 399 cells hu5; 1828 cells hu8; 331 cells hu12
## 
## Hu_Thymus_NKT_12  Hu_Thymus_NKT_5  Hu_Thymus_NKT_8 
##              331              399             1828
# See number of cells and features post-processing
print(seur.combined)
## An object of class Seurat 
## 18520 features across 2558 samples within 1 assay 
## Active assay: RNA (18520 features, 0 variable features)

3. SCTransform and Harmony

Code combining SCTransform and Harmony based on this forum discussion and this seurat tutorial

# Normalize with SCTransform and find variable features
seur.list <- SplitObject(seur.combined, split.by = "orig.ident")
seur.list <- lapply(X = seur.list, FUN = SCTransform) # weird, SCTransform adds features?!
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |======================================================================| 100%
sct.hvg   <- SelectIntegrationFeatures(object.list = seur.list, nfeatures = 2000)

# Merge back seurat objects to run Harmony afterwards
seur.SCT <- merge(seur.list$Hu_Thymus_NKT_5, y = c(seur.list$Hu_Thymus_NKT_8, seur.list$Hu_Thymus_NKT_12),
                      project = "HuNKT", merge.data = TRUE) # weird, now there are 30596 genes
VariableFeatures(seur.SCT) <- sct.hvg


# Run PCA (don't scale data when using SCTransform)
seur.SCT <- RunPCA(object = seur.SCT, assay = "SCT", npcs = 50, verbose=F)
DimPlot(seur.SCT, reduction = "pca", group.by = "orig.ident")

# ElbowPlot(seur.SCT)
# Run Harmony for integration
seur.harmony <- RunHarmony(object = seur.SCT,
                           assay.use = "SCT",
                           reduction = "pca",
                           dims.use = 1:50,
                           group.by.vars = "orig.ident",
                           plot_convergence = F) # converged only after 2 iterations
## Harmony 1/10
## Harmony 2/10
## Harmony converged after 2 iterations
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.SCT.harmony; see ?make.names for more details
## on syntax validity
seur.harmony <- RunUMAP(object = seur.harmony, assay = "SCT", reduction = "harmony", dims = 1:30, verbose=F)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
seur.harmony <- FindNeighbors(object = seur.harmony, assay = "SCT", reduction = "harmony", dims = 1:30, verbose=F) %>%
  FindClusters(resolution = 0.8, verbose=F)


# First glance at UMAP
DimPlot(seur.harmony, pt.size = 0.1, label = T, label.size = 7) + ggtitle("SCTransform+Harmony") + theme(legend.position="none")

# Display UMAP
p1 <- DimPlot(seur.harmony, group.by = c("orig.ident"), pt.size = 0.1, label = F) + theme(legend.position="top", title = element_text(size=0))
p2 <- DimPlot(seur.harmony, pt.size = 0.1, label = TRUE) + labs(title="Integrated (2558 cells)") + theme(legend.position="none")
p3 <- DimPlot(seur.harmony, pt.size = 0.1, split.by = "orig.ident", ncol = 3)
ggdraw()+
  draw_plot(p1, x=0, y=0, width=.25, height=1)+
  draw_plot(p2, x=.25, y=0, width=.25, height=1)+
  draw_plot(p3, x=.5, y=0, width=.5, height=1)

4. LogNormalize and MNN

This workflow is the same used for the mouse NKT data.

# Normalize and find variable features
set.seed(123)
seur.mnn <- NormalizeData(seur.combined) # normalized data is saved in seur.combined[["RNA"]]@data
seur.mnn <- FindVariableFeatures(seur.mnn) # return 2,000 HVGs
# plot variable features with labels
LabelPoints(plot = VariableFeaturePlot(seur.mnn),
            points = head(VariableFeatures(seur.mnn), 10), repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 41 rows containing missing values (geom_point).

# ggsave(file.path(path.plots, "hu_hvg.jpeg"), width=8, height=6, bg="white")


# Run integration & dimension reduction
seur.mnn <- RunFastMNN(object.list = SplitObject(seur.mnn, split.by = "orig.ident"), k = 20, verbose=F)
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
seur.mnn <- RunUMAP(seur.mnn, reduction = "mnn", dims = 1:30, verbose=F)
seur.mnn <- FindNeighbors(seur.mnn, reduction = "mnn", dims = 1:30, compute.SNN = TRUE, verbose=F)
seur.mnn <- FindClusters(seur.mnn, resolution = 0.8, verbose=F)

# First glance at UMAP
DimPlot(seur.mnn, pt.size = 0.1, label = T, label.size = 7) + ggtitle("LogNormalize + fastMNN") + theme(legend.position="none")

# Display UMAP
# ggsave("~/Downloads/hu_umap.jpeg", width=5, height=5)

p1 <- DimPlot(seur.mnn, group.by = c("orig.ident"), pt.size = 0.1, label = F) + theme(legend.position="top", title = element_text(size=0))
p2 <- DimPlot(seur.mnn, pt.size = 0.1, label = TRUE) + labs(title="Integrated (2558 cells)") + theme(legend.position="none")
p3 <- DimPlot(seur.mnn, pt.size = 0.1, split.by = "orig.ident", ncol = 3)

# jpeg(filename = file.path(path.plots, "hu_umap.jpeg"), width=5000, height=1500, res=300)
ggdraw()+
  draw_plot(p1, x=0, y=0, width=.25, height=1)+
  draw_plot(p2, x=.25, y=0, width=.25, height=1)+
  draw_plot(p3, x=.5, y=0, width=.5, height=1)

5. Compare methods

# Rename cluster columns for merging downstream
head(seur.mnn@meta.data)
colnames(seur.mnn@meta.data)[6] <- "mnn_clusters"
head(seur.harmony@meta.data)
colnames(seur.harmony@meta.data)[8] <- "harmony_clusters"

# Merge the metadata df
metadata <- merge(seur.mnn@meta.data, seur.harmony@meta.data, by=c("row.names", "orig.ident", "nCount_RNA", "nFeature_RNA", "percent.mt"))
rownames(metadata) <- metadata$Row.names
metadata$Row.names <- NULL
head(metadata)
# Replace the metadata df in the seurat objects
seur.mnn@meta.data <- metadata
seur.harmony@meta.data <- metadata

# Sanity checks
DimPlot(seur.mnn, group.by="mnn_clusters", pt.size = 0.1, label = TRUE) + labs(title="LogNorm+MNN") + theme(legend.position="none")

DimPlot(seur.harmony, group.by="harmony_clusters", pt.size = 0.1, label = TRUE) + labs(title="SCT+Harmony") + theme(legend.position="none")

# Check which cluster corresponds to which across methods
mnn1 <- DimPlot(seur.mnn, group.by="mnn_clusters", pt.size = 0.1, label = TRUE) + labs(title="LogNorm+MNN | Clust on MNN") + theme(legend.position="none")
mnn2 <- DimPlot(seur.mnn, group.by="harmony_clusters", pt.size = 0.1, label = TRUE) + labs(title="LogNorm+MNN | Clust on Harmony") + theme(legend.position="none")
harm1 <- DimPlot(seur.harmony, group.by="harmony_clusters", pt.size = 0.1, label = TRUE) + labs(title="SCT+Harmony | Clust on Harmony") + theme(legend.position="none")
harm2 <- DimPlot(seur.harmony, group.by="mnn_clusters", pt.size = 0.1, label = TRUE) + labs(title="SCT+Harmony  | Clust on MNN") + theme(legend.position="none")

ggdraw()+
  draw_plot(mnn1, x=0,  y=.5, width=.5, height=.5)+
  draw_plot(mnn2, x=.5, y=.5, width=.5, height=.5)+
  draw_plot(harm2, x=0, y=0, width=.5, height=.5) +
  draw_plot(harm1, x=.5, y=0, width=.5, height=.5)

# Function to look at genes of interest
plotFeatBoth <- function(gene){
  mnn <- FeaturePlot(seur.mnn, features=gene) +
    labs(title=gene)
  harm <- FeaturePlot(seur.harmony, features=gene) +
    labs(title=gene)
  # Plot both side-by-side
  mnn | harm
}

# Look at genes of interest (with MNN umap on the left, Harmony umap on the right)
genes.of.interest <- c("ZBTB16", "EOMES", "GZMK", "CCR7", "CCR9", "SELL", "PLAC8", "CD69", "CD4", "ISG15", "EGR2", "SLAMF1", "CD44", "NKG7", "KLRB1")
for (i in genes.of.interest){
  print(plotFeatBoth(i))
}

6. Laurent’s code

a) Create seurat object

# Get the matrices into one list
all.data <- list("NKT_8_Thymus" = human8,
                 "NKT_12_Thymus" = human12,
                 "NKT_5_Thymus" = human5)

# Get sample names and gene names
shared_genes <- Reduce(intersect, lapply(X = all.data, FUN = function(x){ colnames(x) }))
shared_genes <- shared_genes[!shared_genes %in% 'Cell_Index']
sample.names <- names(all.data)


# Create seurat objects, keeping only genes present in all 3 individuals/datasets
all.seurat <- lapply(X = 1:length(all.data), FUN = function(x){
  sample.counts <- all.data[[x]]
  rownames(sample.counts) <- sample.counts$Cell_Index
  sample.counts$Cell_Index <- NULL
  transpose.df <- as.data.frame(t(as.matrix(sample.counts))) # have genes as rows, cells as cols
  transpose.df <- transpose.df[match(shared_genes, rownames(transpose.df)), ] # keep only genes present in all 3 individuals
  seurat.obj <- CreateSeuratObject(counts = transpose.df, project = sample.names[x])
})

# Merge into one seurat object
merged_seurat <- merge(x = all.seurat[[1]], y = all.seurat[2:length(all.seurat)],
                       add.cell.ids = sample.names, project = 'Human_Innate') # 2661 cells with 19,693 genes
print(merged_seurat)
## An object of class Seurat 
## 19693 features across 2661 samples within 1 assay 
## Active assay: RNA (19693 features, 0 variable features)

b) Quality-control

# Cell filtering
merged_seurat[["percent.mt"]] <- PercentageFeatureSet(merged_seurat, pattern = "^MT\\.")
VlnPlot(merged_seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0)

filtered_seurat <- subset(merged_seurat, subset = nFeature_RNA >= 500 & nFeature_RNA < 4000 & nCount_RNA >=500 & percent.mt < 25) # 2558 cells
VlnPlot(filtered_seurat, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0)

# Keep only non-ribosomal-mt genes and genes that are present in at least 10 cells
counts <- GetAssayData(object = filtered_seurat, slot = "counts")
nonzero <- counts > 0
keep_genes <- Matrix::rowSums(nonzero) >= 10
genes.to.remove <- rownames(filtered_seurat)[grep(rownames(filtered_seurat), pattern = "^RPL|^RPS|^MT\\.")]
keep_genes[which(names(keep_genes) %in% genes.to.remove)] = FALSE
filtered_counts <- counts[keep_genes, ]

filtered_seurat <- CreateSeuratObject(filtered_counts, meta.data = filtered_seurat@meta.data) # 11,304 genes
print(filtered_seurat)
## An object of class Seurat 
## 11304 features across 2558 samples within 1 assay 
## Active assay: RNA (11304 features, 0 variable features)

c) SCTransform and Harmony

# Import metadata
# sample_metadata <- read.csv(file = "/Volumes/Samsung_T5/Human_MAIT_NKT/Data/Metadata.csv") %>% 
#   dplyr::select(-sample.id, -X, -file)
# 
# rownames(sample_metadata) <- sample_metadata$orig.ident
# cell_metadata <- sample_metadata[filtered_seurat$orig.ident,]
# for(meta in names(cell_metadata)){
#   filtered_seurat[[meta]] <- cell_metadata[[meta]]
# }
# 
# filtered_seurat@meta.data


# Normalize with SCTransform
filtered_seurat <- NormalizeData(filtered_seurat) # why log-normalize before using SCT?...
filtered_seurat <- SCTransform(filtered_seurat, vars.to.regress = c('percent.mt'), 
                               verbose=F, method = "glmGamPoi")

# Run dimension reduction (linear & non-linear)
filtered_seurat <- RunPCA(filtered_seurat, verbose=F)
# ElbowPlot(filtered_seurat)
filtered_seurat <- RunUMAP(filtered_seurat, dims = 1:15, min.dist = 0.3, spread = 0.5, verbose=F)
filtered_seurat <- FindNeighbors(filtered_seurat, dims = 1:15, verbose=F)
filtered_seurat <- FindClusters(filtered_seurat, resolution = 0.7, verbose=F)


# Run integration with Harmony (from the SCTransform data)
set.seed(0229)

DefaultAssay(filtered_seurat) <- "SCT"
# filtered_seurat_Harmony <- RunHarmony(filtered_seurat, group.by.vars = c("Batch", "Method"), assay.use = "SCT",
filtered_seurat_Harmony <- RunHarmony(filtered_seurat,
                                      group.by.vars = c("orig.ident"),
                                      assay.use = "SCT",
                                      max.iter.harmony = 20) # 11 iterations
## Harmony 1/20
## Harmony 2/20
## Harmony 3/20
## Harmony 4/20
## Harmony 5/20
## Harmony 6/20
## Harmony 7/20
## Harmony 8/20
## Harmony 9/20
## Harmony 10/20
## Harmony 11/20
## Harmony converged after 11 iterations
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.SCT.harmony; see ?make.names for more details
## on syntax validity
filtered_seurat_Harmony <- RunUMAP(filtered_seurat_Harmony, reduction = "harmony", dims = 1:15, verbose=F)
filtered_seurat_Harmony <- FindNeighbors(filtered_seurat_Harmony, reduction = "harmony", dims = 1:15, verbose=F)
filtered_seurat_Harmony <- FindClusters(filtered_seurat_Harmony, resolution = 0.5, verbose=F)

DimPlot(filtered_seurat_Harmony, group.by = c("seurat_clusters"),
              pt.size = 1, 
              ncol = 1, 
              label = TRUE, label.size = 6) + NoLegend()

d) LogNormalize and MNN

# Go back to the RNA assay that was lognormalized
DefaultAssay(filtered_seurat) <- "RNA"
filtered_seurat_MNN <- RunFastMNN(object.list = SplitObject(filtered_seurat, split.by = "orig.ident"), k = 20, 
                                  auto.merge = TRUE, verbose=F)
## No variable features found for object1 in the object.list. Running FindVariableFeatures ...
## No variable features found for object2 in the object.list. Running FindVariableFeatures ...
## No variable features found for object3 in the object.list. Running FindVariableFeatures ...
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
filtered_seurat_MNN <- RunUMAP(filtered_seurat_MNN, reduction = "mnn", dims = 1:15, verbose=F)
filtered_seurat_MNN <- FindNeighbors(filtered_seurat_MNN, reduction = "mnn", dims = 1:15, verbose=F)
filtered_seurat_MNN <- FindClusters(filtered_seurat_MNN, resolution = 0.5, verbose=F)

DimPlot(filtered_seurat_MNN, group.by = c("seurat_clusters"),
              pt.size = 1, 
              ncol = 1, 
              label = TRUE, label.size = 6) + NoLegend()

e) Compare methods

# Rename cluster columns for merging downstream
head(filtered_seurat_MNN@meta.data)
colnames(filtered_seurat_MNN@meta.data)[8] <- "mnn_clusters"
head(filtered_seurat_Harmony@meta.data)
colnames(filtered_seurat_Harmony@meta.data)[8] <- "harmony_clusters"

# Merge the metadata df
metadata <- merge(filtered_seurat_MNN@meta.data, filtered_seurat_Harmony@meta.data, by=c("row.names", "orig.ident"))
rownames(metadata) <- metadata$Row.names
metadata$Row.names <- NULL
head(metadata)
# Replace the metadata df in the seurat objects
filtered_seurat_MNN@meta.data <- metadata
filtered_seurat_Harmony@meta.data <- metadata

# Sanity checks
# DimPlot(filtered_seurat_MNN, group.by="mnn_clusters", pt.size = 0.1, label = TRUE) + labs(title="LogNorm+MNN") + theme(legend.position="none")
# DimPlot(filtered_seurat_Harmony, group.by="harmony_clusters", pt.size = 0.1, label = TRUE) + labs(title="SCT+Harmony") + theme(legend.position="none")

# Check which cluster corresponds to which across methods
mnn1 <- DimPlot(filtered_seurat_MNN, group.by="mnn_clusters", pt.size = 0.1, label = TRUE) +
  labs(title="LogNorm+MNN | Clust on MNN") + theme(legend.position="none")
mnn2 <- DimPlot(filtered_seurat_MNN, group.by="harmony_clusters", pt.size = 0.1, label = TRUE) +
  labs(title="LogNorm+MNN | Clust on Harmony") + theme(legend.position="none")
harm1 <- DimPlot(filtered_seurat_Harmony, group.by="harmony_clusters", pt.size = 0.1, label = TRUE) + labs(title="SCT+Harmony | Clust on Harmony") + theme(legend.position="none")
harm2 <- DimPlot(filtered_seurat_Harmony, group.by="mnn_clusters", pt.size = 0.1, label = TRUE) + labs(title="SCT+Harmony  | Clust on MNN") + theme(legend.position="none")

ggdraw()+
  draw_plot(mnn1, x=0,  y=.5, width=.5, height=.5)+
  draw_plot(mnn2, x=.5, y=.5, width=.5, height=.5)+
  draw_plot(harm2, x=0, y=0, width=.5, height=.5) +
  draw_plot(harm1, x=.5, y=0, width=.5, height=.5)

# Function to look at genes of interest
plotFeat <- function(gene){
  mnn <- FeaturePlot(filtered_seurat_MNN, features=gene) +
    labs(title=gene)
  harm <- FeaturePlot(filtered_seurat_Harmony, features=gene) +
    labs(title=gene)
  # Plot both side-by-side
  mnn | harm
}

# Look at genes of interest (with MNN umap on the left, Harmony umap on the right)
for(i in genes.of.interest){
  print(plotFeat(i))
}